-
Notifications
You must be signed in to change notification settings - Fork 15.1k
[AMDGPU] Record old VGPR MSBs in the high bits of s_set_vgpr_msb #165035
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[AMDGPU] Record old VGPR MSBs in the high bits of s_set_vgpr_msb #165035
Conversation
This stack of pull requests is managed by Graphite. Learn more about stacking. |
|
@llvm/pr-subscribers-backend-amdgpu Author: Stanislav Mekhanoshin (rampitec) ChangesFixes: SWDEV-562450 Patch is 44.31 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/165035.diff 6 Files Affected:
diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
index 0be1dd0817605..f9f0bc619d9f7 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPULowerVGPREncoding.cpp
@@ -58,6 +58,8 @@ class AMDGPULowerVGPREncoding {
static constexpr unsigned BitsPerField = 2;
static constexpr unsigned NumFields = 4;
static constexpr unsigned FieldMask = (1 << BitsPerField) - 1;
+ static constexpr unsigned ModeWidth = NumFields * BitsPerField;
+ static constexpr unsigned ModeMask = (1 << ModeWidth) - 1;
using ModeType = PackedVector<unsigned, BitsPerField,
std::bitset<BitsPerField * NumFields>>;
@@ -152,13 +154,21 @@ bool AMDGPULowerVGPREncoding::setMode(ModeTy NewMode, ModeTy Mask,
CurrentMode |= NewMode;
CurrentMask |= Mask;
- MostRecentModeSet->getOperand(0).setImm(CurrentMode);
+ MachineOperand &Op = MostRecentModeSet->getOperand(0);
+
+ // Carry old mode bits from the existing instruction.
+ int64_t OldModeBits = Op.getImm() & (ModeMask << ModeWidth);
+
+ Op.setImm(CurrentMode | OldModeBits);
return true;
}
+ // Record previous mode into high 8 bits of the immediate.
+ int64_t OldModeBits = CurrentMode << ModeWidth;
+
I = handleClause(I);
- MostRecentModeSet =
- BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB)).addImm(NewMode);
+ MostRecentModeSet = BuildMI(*MBB, I, {}, TII->get(AMDGPU::S_SET_VGPR_MSB))
+ .addImm(NewMode | OldModeBits);
CurrentMode = NewMode;
CurrentMask = Mask;
diff --git a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
index 680e7eb3de6be..844649ebb9ae6 100644
--- a/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
+++ b/llvm/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp
@@ -412,7 +412,7 @@ void AMDGPUAsmPrinter::emitInstruction(const MachineInstr *MI) {
*OutStreamer);
if (isVerbose() && MI->getOpcode() == AMDGPU::S_SET_VGPR_MSB) {
- unsigned V = MI->getOperand(0).getImm();
+ unsigned V = MI->getOperand(0).getImm() & 0xff;
OutStreamer->AddComment(
" msbs: dst=" + Twine(V >> 6) + " src0=" + Twine(V & 3) +
" src1=" + Twine((V >> 2) & 3) + " src2=" + Twine((V >> 4) & 3));
diff --git a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
index 013cfeb364048..28b4da8ab9ebb 100644
--- a/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
+++ b/llvm/lib/Target/AMDGPU/MCTargetDesc/AMDGPUMCTargetDesc.cpp
@@ -168,7 +168,7 @@ bool AMDGPUMCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr,
void AMDGPUMCInstrAnalysis::updateState(const MCInst &Inst, uint64_t Addr) {
if (Inst.getOpcode() == AMDGPU::S_SET_VGPR_MSB_gfx12)
- VgprMSBs = Inst.getOperand(0).getImm();
+ VgprMSBs = Inst.getOperand(0).getImm() & 0xff;
else if (isTerminator(Inst))
VgprMSBs = 0;
}
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
index 8a70a8acd28d3..32cc398740d62 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250-t16.mir
@@ -36,7 +36,7 @@ body: |
; GCN-NEXT: v_add_f16_e64 v128.l /*v384.l*/, v129.l /*v385.l*/, v130.l /*v386.l*/
$vgpr384_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr385_lo16, 0, undef $vgpr386_lo16, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x8a
+ ; GCN-NEXT: s_set_vgpr_msb 0x458a
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v512.h*/, v1.h /*v513.h*/, v2.h /*v514.h*/
$vgpr512_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr513_hi16, 0, undef $vgpr514_hi16, 0, 0, 0, implicit $exec, implicit $mode
@@ -50,7 +50,7 @@ body: |
; GCN-NEXT: v_add_f16_e64 v128.l /*v640.l*/, v129.l /*v641.l*/, v130.l /*v642.l*/
$vgpr640_lo16 = V_ADD_F16_t16_e64 0, undef $vgpr641_lo16, 0, undef $vgpr642_lo16, 0, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xcf
+ ; GCN-NEXT: s_set_vgpr_msb 0x8acf
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=0
; GCN-NEXT: v_add_f16_e64 v0.h /*v768.h*/, v1.h /*v769.h*/, v2.h /*v770.h*/
$vgpr768_hi16 = V_ADD_F16_t16_e64 0, undef $vgpr769_hi16, 0, undef $vgpr770_hi16, 0, 0, 0, implicit $exec, implicit $mode
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
index 2f53c6229ed09..b084ad63a4d5c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-lowering-gfx1250.mir
@@ -22,13 +22,13 @@ body: |
$vgpr257 = V_MOV_B32_e32 undef $vgpr510, implicit $exec
; Single bit change
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4101
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0
; GCN-NEXT: v_rcp_f32_e64 v255, v2 /*v258*/
$vgpr255 = V_RCP_F32_e64 0, undef $vgpr258, 0, 0, implicit $exec, implicit $mode
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: v_rcp_f32_e64 v255, v1
$vgpr255 = V_RCP_F32_e64 0, undef $vgpr1, 0, 0, implicit $exec, implicit $mode
@@ -40,7 +40,7 @@ body: |
; GCN-NEXT: v_add_nc_u32_e32 v0, v253 /*v509*/, v252 /*v508*/
$vgpr0 = V_ADD_U32_e32 undef $vgpr509, undef $vgpr508, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-NEXT: v_add_f32_e64 v2 /*v258*/, v0, v251 /*v507*/
@@ -48,7 +48,7 @@ body: |
; VOP3
- ; GCN-NEXT: s_set_vgpr_msb 0x55
+ ; GCN-NEXT: s_set_vgpr_msb 0x4455
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1
; GCN-NEXT: v_fma_f32 v3 /*v259*/, v4 /*v260*/, v5 /*v261*/, v6 /*v262*/
$vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode
@@ -58,32 +58,32 @@ body: |
$vgpr259 = V_FMA_F32_e64 0, undef $vgpr260, 0, undef $vgpr261, 0, undef $vgpr262, 0, 0, implicit $exec, implicit $mode
; Tuple crossing the 256 boundary
- ; GCN-NEXT: s_set_vgpr_msb 17
+ ; GCN-NEXT: s_set_vgpr_msb 0x5511
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1
; GCN-NEXT: v_mqsad_u32_u8 v[254:257], v[2:3] /*v[258:259]*/, v0, v[244:247] /*v[500:503]*/
$vgpr254_vgpr255_vgpr256_vgpr257 = V_MQSAD_U32_U8_e64 $vgpr258_vgpr259, $vgpr0, undef $vgpr500_vgpr501_vgpr502_vgpr503, 0, implicit $exec
; DPP/tied operand
- ; GCN-NEXT: s_set_vgpr_msb 0x45
+ ; GCN-NEXT: s_set_vgpr_msb 0x1145
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=0
; GCN-NEXT: s_delay_alu instid0(VALU_DEP_1)
; GCN-NEXT: v_add_nc_u16_e64_dpp v0 /*v256*/, v1 /*v257*/, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
$vgpr256 = V_ADD_NC_U16_fake16_e64_dpp $vgpr256, 0, $vgpr257, 0, undef $vgpr258, 0, 0, 1, 15, 15, 1, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 17
+ ; GCN-NEXT: s_set_vgpr_msb 0x4511
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=1
; GCN-NEXT: v_add3_u32_e64_dpp v0, v1 /*v257*/, v0, v2 /*v258*/ quad_perm:[1,0,0,0] row_mask:0xf bank_mask:0xf bound_ctrl:1
$vgpr0 = V_ADD3_U32_e64_dpp $vgpr0, $vgpr257, $vgpr0, undef $vgpr258, 1, 15, 15, 1, implicit $exec
; DS (addr, data0, and data1 operands)
- ; GCN-NEXT: s_set_vgpr_msb 20
+ ; GCN-NEXT: s_set_vgpr_msb 0x1114
; ASM-SAME: ; msbs: dst=0 src0=0 src1=1 src2=1
; GCN-NEXT: ds_store_2addr_b32 v0, v248 /*v504*/, v249 /*v505*/ offset1:1
DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr504, undef $vgpr505, 0, 1, 0, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x1400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: ds_store_2addr_b32 v0, v248, v249 offset1:1
DS_WRITE2_B32_gfx9 $vgpr0, undef $vgpr248, undef $vgpr249, 0, 1, 0, implicit $exec
@@ -93,13 +93,13 @@ body: |
; GCN-NEXT: ds_load_b32 v0, v255 /*v511*/
$vgpr0 = DS_READ_B32_gfx9 $vgpr511, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x144
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: ds_add_rtn_u32 v255 /*v511*/, v0, v248 /*v504*/
$vgpr511 = DS_ADD_RTN_U32_gfx9 $vgpr0, undef $vgpr504, 0, 0, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: ds_add_rtn_u32 v0, v0, v0
$vgpr0 = DS_ADD_RTN_U32_gfx9 $vgpr0, $vgpr0, 0, 0, implicit $exec
@@ -111,17 +111,17 @@ body: |
; GCN-NEXT: global_load_b32 v2, v[2:3] /*v[258:259]*/, off
$vgpr2 = GLOBAL_LOAD_DWORD undef $vgpr258_vgpr259, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 64
+ ; GCN-NEXT: s_set_vgpr_msb 0x140
; ASM-SAME: ; msbs: dst=1 src0=0 src1=0 src2=0
; GCN-NEXT: global_load_b32 v255 /*v511*/, v0, s[0:1]
$vgpr511 = GLOBAL_LOAD_DWORD_SADDR undef $sgpr0_sgpr1, $vgpr0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x4001
; ASM-SAME: ; msbs: dst=0 src0=1 src1=0 src2=0
; GCN-NEXT: scratch_load_u8 v0, v255 /*v511*/, s0
$vgpr0 = SCRATCH_LOAD_UBYTE_SVS $vgpr511, undef $sgpr0, 0, 0, implicit $exec, implicit $flat_scr
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: global_store_b32 v[0:1], v2, off
GLOBAL_STORE_DWORD $vgpr0_vgpr1, $vgpr2, 0, 0, implicit $exec
@@ -135,13 +135,13 @@ body: |
; GCN-NEXT: global_store_b96 v[0:1] /*v[256:257]*/, v[244:246] /*v[500:502]*/, off
GLOBAL_STORE_DWORDX3 $vgpr256_vgpr257, $vgpr500_vgpr501_vgpr502, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; ASM-SAME: ; msbs: dst=1 src0=0 src1=1 src2=0
; GCN-NEXT: flat_atomic_add_u32 v254 /*v510*/, v[0:1], v255 /*v511*/ th:TH_ATOMIC_RETURN
$vgpr510 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr511, 0, 1, implicit $exec, implicit $flat_scr
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4400
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: flat_atomic_add_u32 v0, v[0:1], v255 th:TH_ATOMIC_RETURN
$vgpr0 = FLAT_ATOMIC_ADD_RTN $vgpr0_vgpr1, $vgpr255, 0, 1, implicit $exec, implicit $flat_scr
@@ -156,12 +156,12 @@ body: |
; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0, s[8:11], s3 offen
$vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr0, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x41
+ ; GCN-NEXT: s_set_vgpr_msb 0x4041
; ASM-SAME: ; msbs: dst=1 src0=1 src1=0 src2=0
; GCN-NEXT: buffer_load_b32 v1 /*v257*/, v0 /*v256*/, s[8:11], s3 offen
$vgpr257 = BUFFER_LOAD_DWORD_VBUFFER_OFFEN $vgpr256, undef $sgpr8_sgpr9_sgpr10_sgpr11, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: buffer_store_b32 v0, v1, s[0:3], s3 offen
BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec
@@ -171,7 +171,7 @@ body: |
; GCN-NEXT: buffer_store_b32 v0 /*v256*/, v1 /*v257*/, s[0:3], s3 offen
BUFFER_STORE_DWORD_VBUFFER_OFFEN $vgpr256, $vgpr257, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4100
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: buffer_atomic_add_f32 v0, v1, s[0:3], s3 offen
BUFFER_ATOMIC_ADD_F32_VBUFFER_OFFEN $vgpr0, $vgpr1, undef $sgpr0_sgpr1_sgpr2_sgpr3, undef $sgpr3, 0, 0, implicit $exec
@@ -183,44 +183,44 @@ body: |
; VGPRs above 512
- ; GCN-NEXT: s_set_vgpr_msb 0xaa
+ ; GCN-NEXT: s_set_vgpr_msb 0x41aa
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xab
+ ; GCN-NEXT: s_set_vgpr_msb 0xaaab
; ASM-SAME: ; msbs: dst=2 src0=3 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v0 /*v768*/, v2 /*v514*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr768, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xae
+ ; GCN-NEXT: s_set_vgpr_msb 0xabae
; ASM-SAME: ; msbs: dst=2 src0=2 src1=3 src2=2
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v770*/, v3 /*v515*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr770, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xba
+ ; GCN-NEXT: s_set_vgpr_msb 0xaeba
; ASM-SAME: ; msbs: dst=2 src0=2 src1=2 src2=3
; GCN-NEXT: v_fma_f32 v0 /*v512*/, v1 /*v513*/, v2 /*v514*/, v3 /*v771*/
$vgpr512 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xea
+ ; GCN-NEXT: s_set_vgpr_msb 0xbaea
; ASM-SAME: ; msbs: dst=3 src0=2 src1=2 src2=2
; GCN-NEXT: v_fma_f32 v255 /*v1023*/, v1 /*v513*/, v2 /*v514*/, v3 /*v515*/
$vgpr1023 = V_FMA_F32_e64 0, undef $vgpr513, 0, undef $vgpr514, 0, undef $vgpr515, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0xff
+ ; GCN-NEXT: s_set_vgpr_msb 0xeaff
; ASM-SAME: ; msbs: dst=3 src0=3 src1=3 src2=3
; GCN-NEXT: v_fma_f32 v0 /*v768*/, v1 /*v769*/, v2 /*v770*/, v3 /*v771*/
$vgpr768 = V_FMA_F32_e64 0, undef $vgpr769, 0, undef $vgpr770, 0, undef $vgpr771, 0, 0, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x42
+ ; GCN-NEXT: s_set_vgpr_msb 0xff42
; ASM-SAME: ; msbs: dst=1 src0=2 src1=0 src2=0
; GCN-NEXT: v_mov_b32_e32 v0 /*v256*/, v0 /*v512*/
$vgpr256 = V_MOV_B32_e32 undef $vgpr512, implicit $exec
; Reset
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x4200
; ASM-SAME: ; msbs: dst=0 src0=0 src1=0 src2=0
; GCN-NEXT: v_fma_f32 v0, v1, v2, v3
$vgpr0 = V_FMA_F32_e64 0, undef $vgpr1, 0, undef $vgpr2, 0, undef $vgpr3, 0, 0, implicit $exec, implicit $mode
@@ -232,12 +232,12 @@ body: |
; GCN-NEXT: global_store_b96 v[0:1] /*v[512:513]*/, v[0:2] /*v[512:514]*/, off
GLOBAL_STORE_DWORDX3 $vgpr512_vgpr513, $vgpr512_vgpr513_vgpr514, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 11
+ ; GCN-NEXT: s_set_vgpr_msb 0xa0b
; ASM-SAME: ; msbs: dst=0 src0=3 src1=2 src2=0
; GCN-NEXT: global_store_b64 v[254:255] /*v[1022:1023]*/, v[254:255] /*v[766:767]*/, off
GLOBAL_STORE_DWORDX2 $vgpr1022_vgpr1023, $vgpr766_vgpr767, 0, 0, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x55
+ ; GCN-NEXT: s_set_vgpr_msb 0xb55
; ASM-SAME: ; msbs: dst=1 src0=1 src1=1 src2=1
; GCN-NEXT: v_wmma_f32_16x16x32_bf16 v[14:21] /*v[270:277]*/, v[26:33] /*v[282:289]*/, v[34:41] /*v[290:297]*/, v[14:21] /*v[270:277]*/
early-clobber $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277 = V_WMMA_F32_16X16X32_BF16_w32_twoaddr 8, undef $vgpr282_vgpr283_vgpr284_vgpr285_vgpr286_vgpr287_vgpr288_vgpr289, 8, undef $vgpr290_vgpr291_vgpr292_vgpr293_vgpr294_vgpr295_vgpr296_vgpr297, 8, killed undef $vgpr270_vgpr271_vgpr272_vgpr273_vgpr274_vgpr275_vgpr276_vgpr277, 0, 0, 0, 0, implicit $exec
@@ -247,6 +247,7 @@ body: |
...
# ASM-LABEL: {{^}}vopd:
+
# DIS-LABEL: <vopd>:
---
name: vopd
@@ -262,35 +263,35 @@ body: |
; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, v1, v2 :: v_dual_mul_f32 v0 /*v256*/, v3, v4
$vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr2, undef $vgpr3, undef $vgpr4, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0x41
+ ; GCN-NEXT: s_set_vgpr_msb 0x4041
; GCN-NEXT: v_dual_sub_f32 v244 /*v500*/, s1, v2 :: v_dual_mul_f32 v0 /*v256*/, v44 /*v300*/, v4
$vgpr500, $vgpr256 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $sgpr1, undef $vgpr2, undef $vgpr300, undef $vgpr4, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 4
+ ; GCN-NEXT: s_set_vgpr_msb 0x4104
; GCN-NEXT: v_dual_sub_f32 v255, v1, v44 /*v300*/ :: v_dual_mul_f32 v6, v0, v1 /*v257*/
$vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 undef $vgpr1, undef $vgpr300, undef $vgpr0, $vgpr257, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 1
+ ; GCN-NEXT: s_set_vgpr_msb 0x401
; GCN-NEXT: v_dual_sub_f32 v255, 0, v1 :: v_dual_mul_f32 v6, v44 /*v300*/, v3
$vgpr255, $vgpr6 = V_DUAL_SUB_F32_e32_X_MUL_F32_e32_gfx1250 0, undef $vgpr1, undef $vgpr300, undef $vgpr3, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 64
+ ; GCN-NEXT: s_set_vgpr_msb 0x140
; GCN-NEXT: v_dual_fmamk_f32 v243 /*v499*/, v0, 0xa, v3 :: v_dual_fmac_f32 v0 /*v256*/, v1, v1
$vgpr499, $vgpr256 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr3, undef $vgpr1, undef $vgpr1, $vgpr256, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 5
+ ; GCN-NEXT: s_set_vgpr_msb 0x4005
; GCN-NEXT: v_dual_mov_b32 v2, v3 /*v259*/ :: v_dual_add_f32 v3, v1 /*v257*/, v2 /*v258*/
$vgpr2, $vgpr3 = V_DUAL_MOV_B32_e32_X_ADD_F32_e32_gfx1250 undef $vgpr259, undef $vgpr257, undef $vgpr258, implicit $exec, implicit $mode
- ; GCN-NEXT: s_set_vgpr_msb 0x44
+ ; GCN-NEXT: s_set_vgpr_msb 0x544
; GCN-NEXT: v_dual_fmamk_f32 v244 /*v500*/, v0, 0xa, v44 /*v300*/ :: v_dual_fmac_f32 v3 /*v259*/, v1, v1 /*v257*/
$vgpr500, $vgpr259 = V_DUAL_FMAMK_F32_X_FMAC_F32_e32_gfx1250 undef $vgpr0, 10, undef $vgpr300, undef $vgpr1, undef $vgpr257, $vgpr259, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 16
+ ; GCN-NEXT: s_set_vgpr_msb 0x4410
; GCN-NEXT: v_dual_fma_f32 v0, v6, v6, v44 /*v300*/ :: v_dual_fma_f32 v1, v4, v5, v45 /*v301*/
$vgpr0, $vgpr1 = V_DUAL_FMA_F32_e64_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, 0, undef $vgpr300, 0, undef $vgpr4, 0, undef $vgpr5, 0, undef $vgpr301, implicit $mode, implicit $exec
- ; GCN-NEXT: s_set_vgpr_msb 0
+ ; GCN-NEXT: s_set_vgpr_msb 0x1000
; GCN-NEXT: v_dual_fmac_f32 v2, v6, v6 :: v_dual_fma_f32 v3, v4, v5, v3
$vgpr2, $vgpr3 = V_DUAL_FMAC_F32_e32_X_FMA_F32_e64_e96_gfx1250 0, undef $vgpr6, 0, undef $vgpr6, undef $vgpr2, 0, undef $vgpr4, 0, undef $vgpr5, 0, $vgpr3, implicit $mode, implicit $exec
@@ -298,7 +299,7 @@ body: |
; GCN-NEXT: v_dual_fma_f32 ...
[truncated]
|
02619b7 to
46849f5
Compare
80fd780 to
6bcab85
Compare
|
Can you say anything at all about why this is done? |
This is the new FW requirement. |
Fixes: SWDEV-562450
6bcab85 to
a6b1802
Compare
…_msbs_in_the_high_bits_of_s_set_vgpr_msb
…m#165035) Fixes: SWDEV-562450

Fixes: SWDEV-562450